/* This file is part of swapper project
 *
 * Copyright (C) 2020 The Swapper Project Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.swapper.regexp;

import java.nio.CharBuffer;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.Objects;

/**
 * The regular expression pattern parser.
 * Parses regular expressions, but supports only partial syntax.
 */
final class PatternParser {
  private static final int OPERATOR_EITHER = 0;
  private static final int OPERATOR_CONCAT = 1;
  private static final int OPERATOR_OPENING = Integer.MIN_VALUE;
  private static final int OPERATOR_CLOSING = Integer.MAX_VALUE;

  public static PatternNode parse(PatternReverser context, String pattern) {
    return new PatternParser(context, pattern).parse();
  }

  private final PatternReverser context;
  private final CharBuffer buffer;
  private final Deque<PatternNode> nodes = new ArrayDeque<>();
  private final Deque<Integer> operators = new ArrayDeque<>();

  /**
   * Create a regular expression random node parser.
   *
   * @param context the generator context.
   * @param pattern the regular expression pattern.
   * @throws NullPointerException if {@code context} or {@code pattern} is null.
   */
  private PatternParser(PatternReverser context, String pattern) {
    this.context = Objects.requireNonNull(context);
    if (pattern.isEmpty()) {
      throw new IllegalArgumentException("The regular expression pattern is empty!");
    }
    buffer = CharBuffer.wrap(pattern);
  }

  /**
   * Gets next buffer character and position move to next character.
   *
   * @return next buffer character.
   * @throws IndexOutOfBoundsException if position not smaller than the buffer's limit.
   */
  private int peek() {
    char before = buffer.get();
    if (buffer.hasRemaining()) {
      int position = buffer.position();
      char after = buffer.get(position);
      if (Character.isSurrogatePair(before, after)) {
        buffer.position(position + 1);
        return Character.toCodePoint(before, after);
      }
    }
    return before;
  }

  /**
   * Parse the regular expression pattern.
   * Use binary tree inorder traversal(LDR).
   *
   * @return the pattern node.
   */
  private PatternNode parse() {
    int peek;
    boolean needPushConcat = false;
    while (true) {
      if (!buffer.hasRemaining()) {
        handleOperatorsWhenEnding();
        return nodes.pop();
      }
      switch (peek = peek()) {
        case '(':
          if (needPushConcat) {
            handleOperatorsWhenPushConcat();
          }
          operators.push(OPERATOR_OPENING);
          needPushConcat = false;
          break;
        case ')':
          handleOperatorsWhenClosing();
          needPushConcat = true;
          break;
        case '|':
          while (!operators.isEmpty()) {
            Integer operator = operators.peek();
            if (operator < OPERATOR_EITHER) {
              break;
            }
            handleStackTopOperator(operators.pop());
          }
          operators.push(OPERATOR_EITHER);
          needPushConcat = false;
          break;
        case '?':
          nodes.push(new QuantifierNode(context, nodes.pop(), Quantifier.ZERO_OR_ONE));
          needPushConcat = true;
          break;
        case '*':
          nodes.push(new QuantifierNode(context, nodes.pop(), Quantifier.ZERO_OR_MORE));
          needPushConcat = true;
          break;
        case '+':
          nodes.push(new QuantifierNode(context, nodes.pop(), Quantifier.ONE_OR_MORE));
          needPushConcat = true;
          break;
        case '{':
          nodes.push(new QuantifierNode(context, nodes.pop(), nextQuantifier()));
          needPushConcat = true;
          break;
        case '.':
          if (needPushConcat) {
            handleOperatorsWhenPushConcat();
          }
          if (context.isNegatedWithinGraph()) {
            nodes.push(new CharacterClassesNode(context, CharacterClasses.ANY_WITHIN_GRAPH));
          } else {
            nodes.push(new CharacterClassesNode(context, CharacterClasses.ANY));
          }
          needPushConcat = true;
          break;
        case '\\':
          if (needPushConcat) {
            handleOperatorsWhenPushConcat();
          }
          nodes.push(nextEscapeOrPredefinedCharacterClasses());
          needPushConcat = true;
          break;
        case '[':
          if (needPushConcat) {
            handleOperatorsWhenPushConcat();
          }
          nodes.push(nextCharacterClasses());
          needPushConcat = true;
          break;
        default:
          if (needPushConcat) {
            handleOperatorsWhenPushConcat();
          }
          nodes.push(new CharacterNode(context, peek));
          needPushConcat = true;
          break;
      }
    }
  }

  /**
   * Handle stack operators when push concat operator.
   * Handle stack top operators util meet priority less than concat operator.
   */
  private void handleOperatorsWhenPushConcat() {
    while (!operators.isEmpty()) {
      Integer top = operators.peek();
      if (top < OPERATOR_CONCAT) {
        break;
      }
      handleStackTopOperator(operators.pop());
    }
    operators.push(OPERATOR_CONCAT);
  }

  /**
   * Handle stack operators when closing.
   * Handle stack top operators util meet opening operator.
   */
  private void handleOperatorsWhenClosing() {
    while (true) {
      Integer pop = operators.pop();
      if (pop == OPERATOR_OPENING) {
        break;
      }
      handleStackTopOperator(pop);
    }
  }

  /**
   * Handle stack operators when ending.
   * Clear and handle all operators.
   */
  private void handleOperatorsWhenEnding() {
    while (!operators.isEmpty()) {
      handleStackTopOperator(operators.pop());
    }
  }

  private void handleStackTopOperator(Integer operator) {
    PatternNode left, right;
    switch (operator) {
      case OPERATOR_EITHER:
        right = nodes.pop();
        left = nodes.pop();
        nodes.push(new EitherNode(context, left, right));
        break;
      case OPERATOR_CONCAT:
        right = nodes.pop();
        left = nodes.pop();
        nodes.push(new ConcatNode(context, left, right));
        break;
      default:
        throw new IllegalStateException("Cannot handle top of stack operators: " + operator);
    }
  }

  /**
   * Parse the next quantifier.
   * Quantifier are one of the following three types:
   * {n}    Match exactly n times;
   * {n,}   Match at least n times;
   * {n,m}  Match at least n but not more than m times.
   *
   * @return next quantifier.
   */
  private Quantifier nextQuantifier() {
    int peek;
    StringBuilder builder = new StringBuilder();
    boolean hasLeastTimes = false;
    while ((peek = peek()) >= '0' && peek <= '9') {
      hasLeastTimes = true;
      builder.append((char) peek);
    }
    if (!hasLeastTimes) {
      throw new IllegalArgumentException("Quantifier requires a least times.");
    }
    if (peek == ',') {
      builder.append(',');
    } else if (peek == '}') {
      return Quantifier.ofExactly(Integer.parseInt(builder.toString()));
    } else {
      throw new IllegalArgumentException("Quantifier should end with '}'.");
    }
    boolean hasMostTimes = false;
    while ((peek = peek()) >= '0' && peek <= '9') {
      hasMostTimes = true;
      builder.append((char) peek);
    }
    if (peek == '}') {
      String[] numbers = builder.toString().split(",", 2);
      if (!hasMostTimes) {
        return Quantifier.ofLeast(Integer.parseInt(numbers[0]));
      } else {
        return Quantifier.ofBetween(Integer.parseInt(numbers[0]), Integer.parseInt(numbers[1]));
      }
    } else {
      throw new IllegalArgumentException("Quantifier should end with '}'.");
    }
  }

  /**
   * Parsing escape character or predefined character classes.
   * Escape format:
   * 1.'\t' The tab character ('\u0009')
   * 2.'\n' The newline (line feed) character ('\u000A')
   * 3.'\r' The carriage-return character ('\u000D')
   * 4.'\f' The form-feed character ('\u000C')
   * 5.'\a' The alert (bell) character ('\u0007')
   * 6.'\e' The escape character ('\u001B')
   * Predefined character classes:
   * 0.'.'   Any character (may or may not match line terminators)
   * 1.'\d'  A digit: [0-9]
   * 2.'\D'  A non-digit: [^0-9]
   * 3.'\h'  A horizontal whitespace character: [ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]
   * 4.'\H'  A non-horizontal whitespace character: [^\h]
   * 5.'\s'  A whitespace character: [ \t\n\x0B\f\r]
   * 6.'\S'  A non-whitespace character: [^\s]
   * 7.'\v'  A vertical whitespace character: [\n\x0B\f\r\x85\u2028\u2029]
   * 8.'\V'  A non-vertical whitespace character: [^\v]
   * 9.'\w'  A word character: [a-zA-Z_0-9]
   * 10.'\W' A non-word character: [^\w]
   *
   * @return the escape character node or predefined character classes node.
   */
  private PatternNode nextEscapeOrPredefinedCharacterClasses() {
    int peek;
    switch (peek = peek()) {
      case 't':
        return new CharacterNode(context, '\t');
      case 'n':
        return new CharacterNode(context, '\n');
      case 'r':
        return new CharacterNode(context, '\r');
      case 'f':
        return new CharacterNode(context, '\f');
      case 'a':
        return new CharacterNode(context, '\u0007');
      case 'e':
        return new CharacterNode(context, '\u001b');
      case 'p':
        return nextPosixCharacterClasses();
      case '0':
        return nextOctalCharacter();
      case 'u':
        return nextLongHexadecimalCharacter();
      case 'x':
        return nextShortOrUnicodeHexadecimalCharacter();
      case 'd':
        return new CharacterClassesNode(context, CharacterClasses.DIGIT);
      case 'D':
        if (context.isNegatedWithinGraph()) {
          return new CharacterClassesNode(context, CharacterClasses.NON_DIGIT_WITHIN_GRAPH);
        } else {
          return new CharacterClassesNode(context, CharacterClasses.NON_DIGIT);
        }
      case 's':
        return new CharacterClassesNode(context, CharacterClasses.WHITESPACE);
      case 'S':
        if (context.isNegatedWithinGraph()) {
          return new CharacterClassesNode(context, CharacterClasses.NON_WHITESPACE_WITHIN_GRAPH);
        } else {
          return new CharacterClassesNode(context, CharacterClasses.NON_WHITESPACE);
        }
      case 'w':
        return new CharacterClassesNode(context, CharacterClasses.WORD);
      case 'W':
        if (context.isNegatedWithinGraph()) {
          return new CharacterClassesNode(context, CharacterClasses.NON_WORD_WITHIN_GRAPH);
        } else {
          return new CharacterClassesNode(context, CharacterClasses.NON_WORD);
        }
      case 'h':
        return new CharacterClassesNode(context, CharacterClasses.HORIZONTAL_WHITESPACE);
      case 'H':
        if (context.isNegatedWithinGraph()) {
          return new CharacterClassesNode(context, CharacterClasses.NON_HORIZONTAL_WHITESPACE_WITHIN_GRAPH);
        } else {
          return new CharacterClassesNode(context, CharacterClasses.NON_HORIZONTAL_WHITESPACE);
        }
      case 'v':
        return new CharacterClassesNode(context, CharacterClasses.VERTICAL_WHITESPACE);
      case 'V':
        if (context.isNegatedWithinGraph()) {
          return new CharacterClassesNode(context, CharacterClasses.NON_VERTICAL_WHITESPACE_WITHIN_GRAPH);
        } else {
          return new CharacterClassesNode(context, CharacterClasses.NON_VERTICAL_WHITESPACE);
        }
      default:
        return new CharacterNode(context, peek);
    }
  }

  /**
   * Parsing octal value character format.
   * 1.'\0n'    The character with octal value 0n (0<=n<=7);
   * 2.'\0nn'   The character with octal value 0nn (0<=n<=7);
   * 3.'\0mnn'  The character with octal value 0mnn (0<=m<=3, 0<=n<=7).
   *
   * @return the character node with octal value, or '0'
   * character node if end of pattern.
   */
  private PatternNode nextOctalCharacter() {
    if (!buffer.hasRemaining()) {
      return new CharacterNode(context, '0');
    }
    int position = buffer.position();
    int peek = peek();
    int oct1 = CharacterHelper.codePointToOct(peek);
    if (oct1 == -1) {
      buffer.position(position);
      return new CharacterNode(context, '0');
    }
    if (!buffer.hasRemaining()) {
      return new CharacterNode(context, (char) oct1);
    }
    int oct2 = CharacterHelper.codePointToOct(peek());
    if (oct2 == -1) {
      buffer.position(position + 1);
      return new CharacterNode(context, (char) oct1);
    }
    if (peek > '3' || !buffer.hasRemaining()) {
      System.out.println();
      return new CharacterNode(context, (char) ((oct1 << 3) + oct2));
    }
    int oct3 = CharacterHelper.codePointToOct(peek());
    if (oct3 == -1) {
      buffer.position(position + 1);
      return new CharacterNode(context, (char) ((oct1 << 3) + oct2));
    }
    return new CharacterNode(context, (char) ((oct1 << 6) + (oct2 << 3) + oct3));
  }

  /**
   * Parsing long hexadecimal value character format.
   * '\\uhhhh' The character with hexadecimal value 0xhhhh;
   *
   * @return the character node with hexadecimal value.
   */
  private PatternNode nextLongHexadecimalCharacter() {
    int dec, hex = 0;
    int position = buffer.position();
    for (int i = 0; i < 4; ++i) {
      if (buffer.hasRemaining()) {
        if ((dec = CharacterHelper.codePointToHex(peek())) != -1) {
          hex += dec << (4 * (3 - i));
        } else {
          buffer.position(position);
          return new CharacterNode(context, 'u');
        }
      } else {
        buffer.position(position);
        return new CharacterNode(context, 'u');
      }
    }
    return new CharacterNode(context, (char) hex);
  }

  /**
   * Parsing short hexadecimal value character format.
   * 1.'\xhh'       The character with hexadecimal value 0xhh;
   * 2.'\x{h...h}'  The character with hexadecimal value 0xh...h
   * ({@link Character#MIN_CODE_POINT} <=0xh...h<= {@link Character#MAX_CODE_POINT}).
   *
   * @return the character node with hexadecimal value, or 'x'
   * character node if end of pattern.
   */
  private PatternNode nextShortOrUnicodeHexadecimalCharacter() {
    if (!buffer.hasRemaining()) {
      return new CharacterNode(context, 'x');
    }
    int position = buffer.position();
    int dec, hex = 0, peek = peek();
    if (peek == '{') {
      StringBuilder builder = new StringBuilder();
      while ((peek = peek()) != '}') {
        if (CharacterHelper.isHexCharacter(peek)) {
          builder.append((char) peek);
          if (builder.length() > 6) {
            throw new IllegalArgumentException("The character with hexadecimal value more than  0x10FFFF.");
          }
        } else {
          throw new IllegalArgumentException("Illegal \\x{h...h}, requires hexadecimal character.");
        }
      }
      if (builder.length() < 1) {
        throw new IllegalArgumentException("Illegal \\x{h...h}, requires hexadecimal character.");
      }
      hex = Integer.parseInt(builder.toString(), 16);
      if (Character.isValidCodePoint(hex)) {
        return new CharacterNode(context, hex);
      } else {
        throw new IllegalArgumentException("The code point is invalid.");
      }
    } else {
      buffer.position(position);
    }
    for (int i = 0; i < 2; ++i) {
      if (buffer.hasRemaining()) {
        if ((dec = CharacterHelper.codePointToHex(peek())) != -1) {
          hex += dec << (4 * (1 - i));
        } else {
          buffer.position(position);
          return new CharacterNode(context, 'x');
        }
      } else {
        buffer.position(position);
        return new CharacterNode(context, 'x');
      }
    }
    return new CharacterNode(context, (char) hex);
  }

  /**
   * Parsing POSIX character classes (US-ASCII only).
   * \p{Lower}  A lower-case alphabetic character: [a-z]
   * \p{Upper}  An upper-case alphabetic character:[A-Z]
   * \p{ASCII}  All ASCII:[\x00-\x7F]
   * \p{Alpha}  An alphabetic character:[\p{Lower}\p{Upper}]
   * \p{Digit}  A decimal digit: [0-9]
   * \p{Alnum}  An alphanumeric character:[\p{Alpha}\p{Digit}]
   * \p{Punct}  Punctuation: One of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
   * \p{Graph}  A visible character: [\p{Alnum}\p{Punct}]
   * \p{Print}  A printable character: [\p{Graph}\x20]
   * \p{Blank}  A space or a tab: [ \t]
   * \p{Cntrl}  A control character: [\x00-\x1F\x7F]
   * \p{XDigit} A hexadecimal digit: [0-9a-fA-F]
   * \p{Space}  A whitespace character: [ \t\n\x0B\f\r]
   *
   * @return the POSIX character classes, or first character node
   * if format not '\p{POSIX-NAME}'.
   */
  private PatternNode nextPosixCharacterClasses() {
    int peek;
    if ((peek = peek()) != '{') {
      return new CharacterNode(context, peek);
    }
    StringBuilder builder = new StringBuilder();
    while ((peek = peek()) != '}') {
      builder.append((char) peek);
    }
    String name = builder.toString();
    switch (name) {
      case "Lower":
      case "IsLowercase":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_LOWER);
      case "Upper":
      case "IsUppercase":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_UPPER);
      case "ASCII":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_ASCII);
      case "Alpha":
      case "IsAlphabetic":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_ALPHA);
      case "Digit":
      case "IsDigit":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_DIGIT);
      case "Alnum":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_ALNUM);
      case "Punct":
      case "IsPunctuation":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_PUNCT);
      case "Graph":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_GRAPH);
      case "Print":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_PRINT);
      case "Blank":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_BLANK);
      case "Cntrl":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_CNTRL);
      case "XDigit":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_XDIGIT);
      case "Space":
      case "IsWhite_Space":
        return new CharacterClassesNode(context, CharacterClasses.POSIX_SPACE);
      default:
        throw new IllegalArgumentException("Undefined POSIX character classes \\p{" + name + "}.");
    }
  }

  /**
   * Parsing character classes.
   * Current supports 1, 2 and 3 parts.
   * 1.'[abc]'          a, b, or c (simple class)
   * 2.'[^abc]'         Any character except a, b, or c (negation)
   * 3.'[a-zA-Z]'       a through z or A through Z, inclusive (range)
   * 4.'[a-d[m-p]]'     a through d, or m through p: [a-dm-p] (union)
   * 5.'[a-z&&[def]]'   d, e, or f (intersection)
   * 6.'[a-z&&[^bc]]'   a through z, except for b and c: [ad-z] (subtraction)
   * 7.'[a-z&&[^m-p]]'  a through z, and not m through p: [a-lq-z](subtraction)
   *
   * @return character classes node.
   */
  private PatternNode nextCharacterClasses() {
    boolean negated;
    int position = buffer.position();
    if (peek() == '^') {
      negated = true;
    } else {
      negated = false;
      buffer.position(position);
    }
    boolean hasBegin = false, isRange = false;
    int peek, begin = Character.MAX_CODE_POINT;
    CharacterClasses classes = new CharacterClasses();
    while (true) {
      switch (peek = peek()) {
        case ']':
          if (hasBegin) {
            classes.union(new CharacterRange(begin));
          }
          if (!negated) {
            return new CharacterClassesNode(context, classes);
          } else {
            return new CharacterClassesNode(context, classes.except());
          }
        case '-':
          if (hasBegin) {
            isRange = true;
          } else {
            classes.union(new CharacterRange(peek));
          }
          break;
        case '\\':
          PatternNode node = nextEscapeOrPredefinedCharacterClasses();
          if (node instanceof CharacterNode) {
            peek = ((CharacterNode) node).codePoint();
          } else if (node instanceof CharacterClassesNode) {
            classes.unions(((CharacterClassesNode) node).classes());
            break;
          } else {
            throw new IllegalStateException("Illegal meat or escape: " + node);
          }
        default:
          if (isRange) {
            isRange = false;
            hasBegin = false;
            classes.union(new CharacterRange(begin, peek));
          } else {
            if (hasBegin) {
              classes.union(new CharacterRange(begin));
            }
            begin = peek;
            hasBegin = true;
          }
          break;
      }
    }
  }
}
